# coding:utf-8
# Funny (qq:516110288)

# 1.
# 使用正则表达式匹配电话号码：0713 - xxxxxx(湖南省座机号码)
import re

a = "0713-666666"
print(re.search("^0713-\w+", a).group())

# 2.
# 区号中可以包含()
# 或者 -, 而且是可选的, 就是说你写的正则表达式可以匹配
a = "800-555-1212,555-1212,(800)555-1212"
print(re.findall("\d{3}-\d{3}-\d{4}|\(\d{3}\)\d{3}-\d{4}|\d{3}-\d{4}", a))
# 3.
# 选作题:
# 实现一个爬虫代码

import requests
from bs4 import BeautifulSoup
import lxml
import os
import time

DST_DIR = "D:/xinggan/"
headers = {
    'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
    "Referer": "http://www.mm131.com/mingxing/"}


def mkdir(father_dir_name, path):  ##这个函数创建文件夹放置在
    try:
        path = path.strip()
        child_dir_name = os.path.join(father_dir_name, path)
        existed = os.path.exists(child_dir_name)
        if not existed:
            print("新建文件夹:%s" % (path))
            os.makedirs(child_dir_name)
        else:
            print("文件夹%s已存在!" % (path))
            return None
        return child_dir_name
    except:
        return None


def get_one_group(url, group_name):
    html = requests.get(url, headers=headers)
    soup = BeautifulSoup(html.content, "html.parser")
    # title = soup.find("div", class_="content").find("h5").text
    # 获取最大页数
    pages_max = soup.find("div", class_="content-page").find_all("a")[-2].text

    dir_name = mkdir(DST_DIR, group_name)
    if dir_name:
        get_one_page_img(soup, dir_name)
        for i in range(2, int(pages_max) + 1):
            _url = url.split(".html")[0]  # http://www.mm131.com/mingxing/2016.html
            _url += "_%s.html" % (i)
            _html = requests.get(_url, headers=headers)
            _soup = BeautifulSoup(_html.content, "html.parser")
            get_one_page_img(_soup, dir_name)


def get_one_page_img(soup, dir_name):
    img = soup.find("div", class_="content-pic").find("img")
    img_url = img["src"]
    img_name = img["alt"]
    download_img(img_url, dir_name)


def get_all_img(start_url):
    start_html = requests.get(start_url, headers=headers)
    # soup = BeautifulSoup(start_html.content, "html.parser")
    soup = BeautifulSoup(start_html.content, "lxml")

    # 获取所有图片展示页链接
    all_img_href = soup.find("div", class_="main").find("dl", class_="list-left public-box").find_all("a", attrs={
        "target": "_blank"}, recursive=True)
    for a in all_img_href:
        print(a["href"])
        print(a.text)
        get_one_group(a["href"], a.text)
        time.sleep(0.5)


def download_img(img_url, dir_name):
    name = img_url.split('/')[-1]  # img_url=http://img1.mm131.me/pic/2016/1.jpg
    name = os.path.join(dir_name, name) #
    img = requests.get(img_url, headers=headers)
    with open(name, 'ab') as f:
        f.write(img.content)


get_all_img("http://www.mm131.com/mingxing")
# get_one_group("http://www.mm131.com/mingxing/1499.html")


#download_img("http://img1.mm131.me/pic/3095/1.jpg")













